// Parallel HTML Parsing with HSV
// Proof-of-concept demonstrating parallel parsing of HTML-like structured documents.
//
// Copyright (c) 2026 Danslav Slavenskoj, Lingenic LLC
// https://lingenic.com
//
// Dedicated to the public domain under CC0 1.0
// https://creativecommons.org/publicdomain/zero/1.0/
//
// See: https://hsvfile.com/html.html

package htmlparallel

import (
	"fmt"
	"strings"
	"sync"
	"testing"
	"time"
)

// HSV control characters
const (
	cSTX = '\x02'
	cETX = '\x03'
	cSO  = '\x0e'
	cSI  = '\x0f'
	cRS  = '\x1e'
	cUS  = '\x1f'
	cFS  = '\x1c'
)

// buildHSVDocument creates an HSV document representing HTML-like structure
// This simulates what a real HTML-to-HSV converter would output
func buildHSVDocument(sections int) string {
	var b strings.Builder
	b.WriteByte(cSTX)

	for i := 0; i < sections; i++ {
		if i > 0 {
			b.WriteByte(cFS) // Record separator between sections
		}

		// Each section: html:section␟␎html:id␟s{i}␞html:h2␟Section {i}␞html:p␟Content with "quotes" & <angles>␏
		b.WriteString("html:section")
		b.WriteByte(cUS)
		b.WriteByte(cSO)

		b.WriteString("html:id")
		b.WriteByte(cUS)
		b.WriteString(fmt.Sprintf("s%d", i))
		b.WriteByte(cRS)

		b.WriteString("html:h2")
		b.WriteByte(cUS)
		b.WriteString(fmt.Sprintf("Section %d", i))
		b.WriteByte(cRS)

		b.WriteString("html:p")
		b.WriteByte(cUS)
		// Content with characters that would need escaping in HTML
		b.WriteString(fmt.Sprintf(`Content %d with "quotes" & <angles>`, i))

		b.WriteByte(cSI)
	}

	b.WriteByte(cETX)
	return b.String()
}

// Chunk represents a piece of HSV to parse in parallel
type Chunk struct {
	Data  string
	Index int
}

// ParsedSection represents a parsed HTML section
type ParsedSection struct {
	Index int
	Props map[string]string
}

// splitIntoChunks splits HSV at FS (record separator) for parallel processing
func splitIntoChunks(data string) []Chunk {
	// Remove framing
	if len(data) > 0 && data[0] == cSTX {
		data = data[1:]
	}
	if len(data) > 0 && data[len(data)-1] == cETX {
		data = data[:len(data)-1]
	}

	// Split at FS (top level only)
	var chunks []Chunk
	var current strings.Builder
	depth := 0
	idx := 0

	for i := 0; i < len(data); i++ {
		c := data[i]
		switch c {
		case cSO:
			depth++
			current.WriteByte(c)
		case cSI:
			depth--
			current.WriteByte(c)
		case cFS:
			if depth == 0 {
				if current.Len() > 0 {
					chunks = append(chunks, Chunk{Data: current.String(), Index: idx})
					idx++
				}
				current.Reset()
			} else {
				current.WriteByte(c)
			}
		default:
			current.WriteByte(c)
		}
	}

	if current.Len() > 0 {
		chunks = append(chunks, Chunk{Data: current.String(), Index: idx})
	}

	return chunks
}

// parseChunk parses a single chunk - this can run in parallel
func parseChunk(chunk Chunk) ParsedSection {
	result := ParsedSection{
		Index: chunk.Index,
		Props: make(map[string]string),
	}

	data := chunk.Data
	depth := 0
	var currentKey strings.Builder
	var currentValue strings.Builder
	inValue := false
	inNested := false

	for i := 0; i < len(data); i++ {
		c := data[i]
		switch c {
		case cUS:
			if !inNested {
				inValue = true
			} else {
				currentValue.WriteByte(c)
			}
		case cSO:
			depth++
			if depth == 1 && inValue {
				inNested = true
			} else {
				currentValue.WriteByte(c)
			}
		case cSI:
			depth--
			if depth == 0 {
				inNested = false
				// Save the outer key-value
				if currentKey.Len() > 0 {
					result.Props[currentKey.String()] = currentValue.String()
				}
			} else {
				currentValue.WriteByte(c)
			}
		case cRS:
			if depth <= 1 {
				// Property separator at nested level 1
				if currentKey.Len() > 0 && inNested {
					result.Props[currentKey.String()] = currentValue.String()
				}
				currentKey.Reset()
				currentValue.Reset()
				inValue = false
			} else {
				currentValue.WriteByte(c)
			}
		default:
			if inValue {
				currentValue.WriteByte(c)
			} else {
				currentKey.WriteByte(c)
			}
		}
	}

	// Don't forget last property
	if currentKey.Len() > 0 {
		result.Props[currentKey.String()] = currentValue.String()
	}

	return result
}

// parseSequential parses all chunks sequentially
func parseSequential(data string) []ParsedSection {
	chunks := splitIntoChunks(data)
	results := make([]ParsedSection, len(chunks))
	for i, chunk := range chunks {
		results[i] = parseChunk(chunk)
	}
	return results
}

// parseParallel parses all chunks in parallel using goroutines
func parseParallel(data string) []ParsedSection {
	chunks := splitIntoChunks(data)
	results := make([]ParsedSection, len(chunks))

	var wg sync.WaitGroup
	for _, chunk := range chunks {
		wg.Add(1)
		go func(c Chunk) {
			defer wg.Done()
			results[c.Index] = parseChunk(c)
		}(chunk)
	}
	wg.Wait()

	return results
}

func TestHSVHTMLBasic(t *testing.T) {
	hsv := buildHSVDocument(3)
	t.Logf("HSV document (%d bytes): %q", len(hsv), hsv)

	chunks := splitIntoChunks(hsv)
	t.Logf("Split into %d chunks", len(chunks))

	for i, chunk := range chunks {
		t.Logf("Chunk %d: %q", i, chunk.Data)
		parsed := parseChunk(chunk)
		t.Logf("  Parsed: %+v", parsed.Props)
	}
}

func TestParallelVsSequential(t *testing.T) {
	// Generate document with many sections
	numSections := 100
	hsv := buildHSVDocument(numSections)
	t.Logf("Generated HSV: %d bytes, %d sections", len(hsv), numSections)

	// Verify chunking
	chunks := splitIntoChunks(hsv)
	if len(chunks) != numSections {
		t.Fatalf("Expected %d chunks, got %d", numSections, len(chunks))
	}
	t.Logf("Split into %d chunks (correct)", len(chunks))

	// Parse sequentially
	start := time.Now()
	seqResults := parseSequential(hsv)
	seqTime := time.Since(start)

	// Parse in parallel
	start = time.Now()
	parResults := parseParallel(hsv)
	parTime := time.Since(start)

	t.Logf("Sequential: %v", seqTime)
	t.Logf("Parallel:   %v", parTime)

	// Verify results are identical
	if len(seqResults) != len(parResults) {
		t.Fatalf("Result count mismatch: seq=%d, par=%d", len(seqResults), len(parResults))
	}

	for i := range seqResults {
		if seqResults[i].Index != parResults[i].Index {
			t.Errorf("Index mismatch at %d", i)
		}
		for k, v := range seqResults[i].Props {
			if parResults[i].Props[k] != v {
				t.Errorf("Property mismatch at section %d, key %s", i, k)
			}
		}
	}

	t.Log("VERIFIED: Sequential and parallel parsing produce identical results")
}

func TestNoEscapingRequired(t *testing.T) {
	// Build HSV with content that would need escaping in HTML
	var b strings.Builder
	b.WriteByte(cSTX)

	b.WriteString("html:p")
	b.WriteByte(cUS)
	b.WriteString(`Use <div> for containers`) // Literal angle brackets
	b.WriteByte(cFS)

	b.WriteString("html:p")
	b.WriteByte(cUS)
	b.WriteString(`A & B & C`) // Literal ampersands
	b.WriteByte(cFS)

	b.WriteString("html:p")
	b.WriteByte(cUS)
	b.WriteString(`Link with "quotes"`) // Literal quotes

	b.WriteByte(cETX)

	hsv := b.String()
	t.Logf("HSV with special chars: %q", hsv)

	// Verify the raw HSV contains the literal characters
	checks := []struct {
		name string
		char string
	}{
		{"angle brackets", "<div>"},
		{"ampersand", "&"},
		{"quotes", `"quotes"`},
	}

	for _, c := range checks {
		if strings.Contains(hsv, c.char) {
			t.Logf("PRESERVED in HSV: %s (%s) - no escaping needed", c.char, c.name)
		} else {
			t.Errorf("NOT FOUND in HSV: %s (%s)", c.char, c.name)
		}
	}

	// Also verify parsing extracts them
	chunks := splitIntoChunks(hsv)
	t.Logf("Split into %d chunks for parallel processing", len(chunks))
}

func TestLinearScaling(t *testing.T) {
	sizes := []int{100, 500, 1000, 2000}

	t.Log("Size\tChunks\tSeq\t\tPar")
	t.Log("----\t------\t---\t\t---")

	for _, size := range sizes {
		hsv := buildHSVDocument(size)
		chunks := splitIntoChunks(hsv)

		// Warm up
		parseSequential(hsv)
		parseParallel(hsv)

		// Measure
		start := time.Now()
		for i := 0; i < 10; i++ {
			parseSequential(hsv)
		}
		seqTime := time.Since(start) / 10

		start = time.Now()
		for i := 0; i < 10; i++ {
			parseParallel(hsv)
		}
		parTime := time.Since(start) / 10

		t.Logf("%d\t%d\t%v\t%v", size, len(chunks), seqTime, parTime)
	}
}

// Benchmarks
func BenchmarkSequential100(b *testing.B) {
	hsv := buildHSVDocument(100)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		parseSequential(hsv)
	}
}

func BenchmarkParallel100(b *testing.B) {
	hsv := buildHSVDocument(100)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		parseParallel(hsv)
	}
}

func BenchmarkSequential1000(b *testing.B) {
	hsv := buildHSVDocument(1000)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		parseSequential(hsv)
	}
}

func BenchmarkParallel1000(b *testing.B) {
	hsv := buildHSVDocument(1000)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		parseParallel(hsv)
	}
}
